/*******************************************************************************
DO FILE SUMMARY

This file produces forest plots for the Puente diabetes paper.

November 4th, 2023 (Stephen running David's code)

David Flood
University of Michigan
Wuqu' Kawoq
*******************************************************************************/

clear
cls
version 18
set more off
capture log close
set showbaselevels

ssc install metan
ssc install leftalign
 
* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Use the macro $S_DATE to save files with current date
// cd "/Users/dcflood/Library/CloudStorage/Dropbox-Personal/Guatemala/Puente cardiometabolic paper/Forest plots"
// cd "/Users/davidflood/Dropbox (Personal)/Guatemala/Puente cardiometabolic paper/Forest plots"

global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")

/*******************************************************************************
Figure 1
*******************************************************************************/

// import excel "/Users/davidflood/Dropbox (Personal)/Guatemala/Puente cardiometabolic paper/Forest plots/Forest Plot Data (for David) v2.xlsx", sheet("Sheet1") firstrow cellrange(A1:J32) clear

import excel "HTN Forest Plot Table (Formatted) 11292024.xlsx", sheet("Sheet1") firstrow cellrange(A1:J36) clear
//import excel "/Users/dcflood/Library/CloudStorage/Dropbox-Personal/Guatemala/Puente cardiometabolic paper/Forest plots/Forest Plot Data (for David) v2.xlsx", sheet("Sheet1") firstrow cellrange(A1:J32) clear

* Transforming for eform display in forestplot
gen rr_est_exp = log(rr_est)
gen rr_lci_exp = log(rr_lci)
gen rr_uci_exp = log(rr_uci)

* Making string variables	
gen prev_est_s = string(prev_est,"%9.1f")
gen prev_lci_s = string(prev_lci,"%9.1f")
gen prev_uci_s = string(prev_uci,"%9.1f")
gen output_prev_s = prev_est_s + " (" + prev_lci_s + " to " + prev_uci_s + ")"

gen abs_est_s = string(abs_est,"%9.1f")
gen abs_lci_s = string(abs_lci,"%9.1f")
gen abs_uci_s = string(abs_uci,"%9.1f")
gen output_abs_s = abs_est_s + " (" + abs_lci_s + " to " + abs_uci_s + ")"

gen rr_est_s = string(rr_est,"%9.1f")
gen rr_lci_s = string(rr_lci,"%9.1f")
gen rr_uci_s = string(rr_uci,"%9.1f")
gen output_rr_s = rr_est_s + " (" + rr_lci_s + " to " + rr_uci_s + ")"

replace output_prev_s = "" if prev_est == .

replace output_abs_s = "" if abs_est == .
replace output_abs_s = "0 (Ref)" if inlist(characteristics,"18-29","18.5 to 24.9","Non-indigenous","Spanish","Rural","1 (Most likely in poverty)")

replace output_rr_s = "" if rr_est == .
replace output_rr_s = "1 (Ref)" if inlist(characteristics,"18-29","18.5 to 24.9","Non-indigenous","Spanish","Rural","1 (Most likely in poverty)")

* Realiigning
leftalign

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(_n,1,5,13,19,23,27,31) // careful with these, _USE = 0 references headings (if the data change and numbers are not adjusted, it will mess up the graph)
replace _USE = 6 if missing(characteristic)

* Adding bolded labels		
label var characteristics `"{bf:Characteristic}"'
label var output_prev_s `"`"{bf:Prevalence of}"' `"{bf:hypertension (%), (95% CI)}"'"'
label var output_abs_s `"`"{bf:Absolute}"' `"{bf:difference (%), (95% CI)}"'"'
label var output_rr_s `"`"{bf:Risk}"' `"{bf:ratio, (95% CI)}"'"'

replace characteristics = "{bf:Overall}" if characteristics == "Overall"
replace characteristics = "{bf:Age group (years)}" if characteristics == "Age group (years)"
replace characteristics = "{bf:BMI category (kg/m{superscript:2})}" if characteristics == "BMI category (kg/m2)"
replace characteristics = "{bf:Ethnicity}" if characteristics == "Ethnicity"
replace characteristics = "{bf:Language}" if characteristics == "Language"
replace characteristics = "{bf:Residence}" if characteristics == "Residence"
replace characteristics = "{bf:Economic status (quartiles)}" if characteristics == "Economic status (quartiles)"

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

local line_size ".25rs"
local forest_font_size "2.3"
local arrow_font_size "2"	

* graph a
forestplot prev_est prev_lci prev_uci, ///
	labels(characteristics) ///
	/// eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_prev_s output_abs_s output_rr_s ) /// this tells forestplot which columns to display
	range(0 55) /// range of the plot
	/// null(0) ///
	xlabel(0(10)50, labsize(2.3)) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(80) ///
	spacing(1.2) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		/// nlineopts(lwidth(.2rs)) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(.4rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) /// 	
	ysize(3) ///
	/// xtitle("Adjusted odds ratio", size(2.3)  margin(l=0 r=64 b=0 t=2)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	title("Figure 1. Prevalence of hypertension in a clinical population of primarily rural and Indigenous women in Guatemala," ///
	"overall and by demographic characteristic", size(2))
	name(figure1,replace) //

// gr_edit .plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy
gr_edit .plotregion1.plot3.style.editstyle line(pattern(shortdash)) editcopy
gr_edit .style.editstyle boxstyle(shadestyle(color(none))) editcopy
// gr_edit .plotregion1.plot3.style.editstyle line(color(none)) editcopy
	
// graph save "Figure plot figures/figure1_${date}.gph", replace
// graph export "Figure plot figures/figure1_${date}.pdf", as(pdf) name(figure1) replace

graph export "plot.png", width(2400) replace